############ Daily Deviation from Moving Average

rm(list = ls())

### libraries and packages
pkg <- c("plyr", "dplyr", "tidyr", 
         "MASS", "multiwayvcov", "fracdiff",
         "fractal",
         "lme4", "ArfimaMLM","gam",
         "forecast",
         "stargazer", "lmtest", "doBy", "ggplot2", 
         "mediation", "vars", "DataCombine", "foreign",
         "mgcv", "ordinal", "reshape2", 
         "xtable", "sandwich", "rms")
lapply(pkg, require, character.only = TRUE)

# rid of E
options(scipen=999)
### setting wd for inputs
setwd("~/Dropbox/Pollution and Public Perceptions in China/Data and Analysis/Data")

# loading the data
load("ppp_cleaned")

# aqi.lagged3
start <- 1 # setting up a "starting value" to facilitate the value generating scheme
p2$aqi.lagged3 <- NA # creating the lagged aqi variable, initially assigning NAs to all of the entries
for (i in 1:length(unique(p2$day))) { # loop over every day at the aggregated level
  span.i <- nrow(p2[p2$day == i,]) + start - 1 # today
  span.i2 <- nrow(p2[p2$day == i+1,]) + span.i # tomorrow
  
  value.to.be.copied <- unique(p2$aqi.lagged2[start:span.i]) # from the starting value to the end of a day
  p2$aqi.lagged3[(span.i+1):span.i2] <- value.to.be.copied # from the starting value of each day to the next
  
  start <- span.i + 1 # updating the starting value
} # we ignore all the error messages 
cbind(p2$aqi.lagged2, p2$aqi.lagged3, p2$day)

# aqi.lagged4
start <- 1 # setting up a "starting value" to facilitate the value generating scheme
p2$aqi.lagged4 <- NA # creating the lagged aqi variable, initially assigning NAs to all of the entries
for (i in 1:length(unique(p2$day))) { # loop over every day at the aggregated level
  span.i <- nrow(p2[p2$day == i,]) + start - 1 # today
  span.i2 <- nrow(p2[p2$day == i+1,]) + span.i # tomorrow
  
  value.to.be.copied <- unique(p2$aqi.lagged3[start:span.i]) # from the starting value to the end of a day
  p2$aqi.lagged4[(span.i+1):span.i2] <- value.to.be.copied # from the starting value of each day to the next
  
  start <- span.i + 1 # updating the starting value
} # we ignore all the error messages 
cbind(p2$aqi.lagged3, p2$aqi.lagged4, p2$day)

# aqi.lagged5
start <- 1 # setting up a "starting value" to facilitate the value generating scheme
p2$aqi.lagged5 <- NA # creating the lagged aqi variable, initially assigning NAs to all of the entries
for (i in 1:length(unique(p2$day))) { # loop over every day at the aggregated level
  span.i <- nrow(p2[p2$day == i,]) + start - 1 # today
  span.i2 <- nrow(p2[p2$day == i+1,]) + span.i # tomorrow
  
  value.to.be.copied <- unique(p2$aqi.lagged4[start:span.i]) # from the starting value to the end of a day
  p2$aqi.lagged5[(span.i+1):span.i2] <- value.to.be.copied # from the starting value of each day to the next
  
  start <- span.i + 1 # updating the starting value
} # we ignore all the error messages 
cbind(p2$aqi.lagged4, p2$aqi.lagged5, p2$day)

# aqi.lagged6
start <- 1 # setting up a "starting value" to facilitate the value generating scheme
p2$aqi.lagged6 <- NA # creating the lagged aqi variable, initially assigning NAs to all of the entries
for (i in 1:length(unique(p2$day))) { # loop over every day at the aggregated level
  span.i <- nrow(p2[p2$day == i,]) + start - 1 # today
  span.i2 <- nrow(p2[p2$day == i+1,]) + span.i # tomorrow
  
  value.to.be.copied <- unique(p2$aqi.lagged5[start:span.i]) # from the starting value to the end of a day
  p2$aqi.lagged6[(span.i+1):span.i2] <- value.to.be.copied # from the starting value of each day to the next
  
  start <- span.i + 1 # updating the starting value
} # we ignore all the error messages 
cbind(p2$aqi.lagged5, p2$aqi.lagged6, p2$day)

# aqi.lagged7
start <- 1 # setting up a "starting value" to facilitate the value generating scheme
p2$aqi.lagged7 <- NA # creating the lagged aqi variable, initially assigning NAs to all of the entries
for (i in 1:length(unique(p2$day))) { # loop over every day at the aggregated level
  span.i <- nrow(p2[p2$day == i,]) + start - 1 # today
  span.i2 <- nrow(p2[p2$day == i+1,]) + span.i # tomorrow
  
  value.to.be.copied <- unique(p2$aqi.lagged6[start:span.i]) # from the starting value to the end of a day
  p2$aqi.lagged7[(span.i+1):span.i2] <- value.to.be.copied # from the starting value of each day to the next
  
  start <- span.i + 1 # updating the starting value
} # we ignore all the error messages 
cbind(p2$aqi.lagged6, p2$aqi.lagged7, p2$day)

# get a moving weekly average
WEEK_TOTAL <- p2$aqi.lagged + p2$aqi.lagged2 + p2$aqi.lagged3 + p2$aqi.lagged4 + p2$aqi.lagged5 + p2$aqi.lagged6 + p2$aqi.lagged7
p2$MA <- WEEK_TOTAL/7
p2$MA
p2$MA_d <- p2$aqi - p2$MA
p2$MA_d

# standardizing MA_d
p2$MA_d <- (p2$MA_d-mean(p2$MA_d, na.rm = T))/(sd(p2$MA_d, na.rm = T))
sd(p2$MA_d, na.rm = T)

# basic OLS with moving weekly averages
l_sat <- lm(l_sat ~ MA_d + parade + national, data = p2) 
m.vcov <- cluster.vcov(l_sat, p2$day)
coeftest(l_sat, m.vcov) # significant

c_sat <- lm(c_sat ~ MA_d + parade + national, data = p2) 
m.vcov <- cluster.vcov(c_sat, p2$day)
coeftest(c_sat, m.vcov) # significant

l_watch <- lm(l_watch ~ MA_d + parade + national, data = p2) 
m.vcov <- cluster.vcov(l_watch, p2$day)
coeftest(l_watch, m.vcov) # significant

c_watch <- lm(c_watch ~ MA_d + parade + national, data = p2) 
m.vcov <- cluster.vcov(c_watch, p2$day) 
coeftest(c_watch, m.vcov) 

anti_west <- lm(anti_west ~ MA_d + parade + national, data = p2)
m.vcov <- cluster.vcov(anti_west, p2$day)
coeftest(anti_west, m.vcov) 

day_pollute <- lm(day_pollute ~ MA_d + parade + national, data = p2) 
m.vcov <- cluster.vcov(day_pollute, p2$day)
coeftest(day_pollute, m.vcov) 

life_sat <- lm(life_sat ~ MA_d + parade + national, data = p2) 
m.vcov <- cluster.vcov(life_sat, p2$day)
coeftest(life_sat, m.vcov) 

## Adding demographic controls
# aqi and government satisfaction at the local level
l_satw <- lm(l_sat
             ~ MA_d + hukou + insider + soe + educ 
             + kids + married + income + gender + ccp + parade + national + age, data=p2)
m.vcov1 <-cluster.vcov(l_satw, p2$day)  
sew1 <- coeftest(l_satw, m.vcov1) 

# aqi and anti-west sentiment
c_satw <- lm(c_sat
             ~ MA_d + hukou + insider + soe + educ 
             + kids + married + income + gender + ccp + parade + national + age, data=p2)
m.vcov25 <-cluster.vcov(c_satw, p2$day)  
sew2 <- coeftest(c_satw, m.vcov25) 

# aqi and the demand for more watchdog at the local level
l_watchw <- lm(l_watch
               ~ MA_d + hukou + insider + soe + educ 
               + kids + married + income + gender + ccp + parade + national + age, data=p2)
m.vcov13 <-cluster.vcov(l_watchw, p2$day)  
sew3 <- coeftest(l_watchw, m.vcov13) 

# aqi and the demand for more watchdog at the central level 
c_watchw <- lm(c_watch
               ~ MA_d + hukou + insider + soe + educ 
               + kids + married + income + gender + ccp + parade + national + age, data=p2)
m.vcov14 <-cluster.vcov(c_watchw, p2$day)  
sew4 <- coeftest(c_watchw, m.vcov14) 


# aqi and anti-west sentiment
anti_westw <- lm(anti_west
                 ~ MA_d + hukou + insider + soe + educ 
                 + kids + married + income + gender + ccp + parade + national + age, data=p2)
m.vcov25 <-cluster.vcov(anti_westw, p2$day)  
sew6 <- coeftest(anti_westw, m.vcov25) 


# aqi and life satisfaction
life_satw <- lm(life_sat
                ~ MA_d + hukou + insider + soe + educ 
                + kids + married + income + gender + ccp + parade + national + age, data=p2)
m.vcov25 <-cluster.vcov(life_satw, p2$day)  
sew8 <- coeftest(life_satw, m.vcov25) 

# Making a Table
column.labels2 <- c("Local Government", "Central Government", "Local Oversight", "Central Oversight",  "Anti-West")

cov.labs.c2 <-c("AQI deviation","Hukou Status", "Regime Insider", "SOE Employee", "Education","Children","Married","Income", "Female", "CCP Member","Parade Period","Holiday","Age")
## TABLE A14
sink("~/Dropbox/Pollution and Public Perceptions in China/Manuscript/Tables/DMW.tex")
stargazer(l_satw, c_satw, l_watchw, c_watchw, anti_westw, font.size = "large", digits = 4, 
          column.labels = column.labels2, 
          covariate.labels = cov.labs.c2,
          dep.var.labels.include = FALSE,
          model.numbers = FALSE,
          style = "qje", no.space = FALSE, keep.stat = c("n"),
          star.cutoffs = c(.10, .05, .01), omit = c("y>=2", "y>=3", "y>=4", "y>=5"),
          se = list(sew1[,2], sew2[,2], sew3[, 2], sew4[, 2], sew6[, 2]),
          header = F, notes = "robust standard errors clustered at the day level in parentheses", float = F)
sink()
